/*
This code produces Appendix Figure B2 and Table B1.
The data includes:
-Plasma openings within a Xkm distance of each CBG centroid (treat_25k_cbg.dta)
-American Community Survey (ACS), annually (acs_tract_controls.dta)
-NETS data aggregated to the NAICs-CensusTract-Year level from 2001-2015 (for purchase, not publicly available) 
-SafeGraph data, counting the number of tracked plasma centers in each CBG (for purchase, not publicly available) 
*/


clear all
set more off

global cd ""  //Set the path to the root folder ending in RFSDelivery
global nets "${cd}/Data/NETS"  // NETS
global census "${cd}/ACS/Analaysis" // ACS
global ccp "${cd}/Data/Geography/TreatedControlPanel" // plasma centers

/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
											DATA CLEANING
 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
* NETS count of payday and pawn shops by tract, (1989-2014) ----------------------------------
use "${nets}/nets_est.dta", clear // NETS data aggregated to the NAICs-CensusTract-Year level (for purchase, not publicly available) 
* clean data
drop if year>2021
drop if missing(tract2000)
rename tract2000 tract
gen bankscu = banks + cu
drop banks cu
* collapse (sum) pawn payday banks_cu bars resto groc gas , by(tract year)
rename pawn ct_pawn
rename payday ct_payday
rename bankscu ct_bankscu
rename bars ct_bars
rename resto ct_resto
rename groc ct_groc
rename gas ct_gas
egen ct_allest = rowtotal(ct_pawn-ct_bankscu)
tab year // NETS spans 1989-2014
drop if year<2000
save nets_count_tract, replace

*  NETS count as of 2014
keep if year==2014
rename ct_pawn ct_pawn2014
rename ct_payday ct_payday2014 
rename ct_bankscu ct_bankscu2014   
rename ct_bars ct_bars2014
rename ct_resto ct_resto2014
rename ct_groc ct_groc2014
rename ct_gas ct_gas2014
rename ct_allest ct_allest2014
drop year
save nets_count_tract2014, replace

* SafeGraph (2018-2021) (establishment types captured at any point in period, used in crossection) --------------------------- 
* SafeGraph data, counting the number of tracked plasma centers in each CBG (for purchase, not publicly available) 
import delimited using "$nets\safegraph_cbg.csv", clear 
*tab topcat
g esttype="OtherEstab2018"
replace esttype="NonBankLender2018" if topcat=="Nondepository Credit Intermediation"
replace esttype="Bank2018" if topcat=="Depository Credit Intermediation"
replace esttype="Casino2018" if inlist(subcat, "Casino Hotels", "Casinos (except Casino Hotels)")
replace esttype="BarsLiquor2018" if inlist(subcat, "Beer, Wine, and Liquor Stores", "Drinking Places (Alcoholic Beverages)")
replace esttype="Childcare2018" if inlist(subcat, "Child Day Care Services", "Child and Youth Services")
replace esttype="CourtsParole2018" if inlist(subcat, "Courts", "Parole Offices and Probation Offices")
replace esttype="EmploymentAgencies2018" if subcat=="Employment Placement Agencies"
replace esttype="Shelters2018" if subcat=="Temporary Shelters"
replace esttype="Restaurants2018" if topcat=="Restaurants and Other Eating Places"
replace esttype="GroceryStores2018" if topcat=="Grocery Stores"
replace esttype="GasStations2018" if topcat=="Gasoline Stations"
format %12.0g cbg
gen str12 cbg2 = string(cbg,"%012.0f") // convert census block group to 12 digit string with leading zeros
drop cbg
rename cbg2 cbg
*covert cbg to tract to macth the ACS data
g tract = substr(cbg,1,11) 
collapse (sum) est_n , by(tract esttype)
rename est_n ct_
* transpose wide to match NETS structure
reshape wide ct_ , i(tract) j(esttype, string)
egen ct_AllEstab2018 = rowtotal(ct_Bank2018 ct_BarsLiquor2018 ct_Casino2018 ct_Childcare2018 ct_CourtsParole2018 ct_EmploymentAgencies2018 ct_GasStations2018 ct_GroceryStores2018 ct_NonBankLender2018 ct_OtherEstab2018 ct_Restaurants2018 ct_Shelters2018)
* replace missing with zeros
save safegraph_count_tract, replace

* Plasma openings (2000-2021)-------------------------------------------------------------------
/* treat_25k_cbg.dta contains plasma openings within a Xkm distance of each CBG centroid. To tranform this to a tract level
opening indicator, take the variable identifing number of plasma centers within a 1km distance of the CBG centroid.
Then aggregate to the tract level and create an indicator for when there is an increase in number of plasma centers
in the tract. This avoids double counting multiple CBGs within the same tracts that are within 5km of the same opening. */
use "$ccp/treat_25k_cbg.dta", clear
local d npc5 // npc5 npc10 npc20  (npc75 = 7.5km)
keep `d' cbg date
format %12.0g cbg
gen str12 cbg2 = string(cbg,"%012.0f") // convert census block group to 12 digit string with leading zeros
drop cbg
rename cbg2 cbg
*covert cbg to tract to macth the ACS data
g tract = substr(cbg,1,11) 
* convert date
generate new_date = dofc(date)
format new_date %td
* take year of date
g year = year(new_date)
* aggregate the count of plasma centers to the tract year level
collapse (sum) `d' , by(tract year)
rename `d' ct_plasma
tab year
drop if year>2021
save plasma_count_tract, replace

*  Plasma count as of 2014
keep if year==2014
rename ct_plasma ct_plasma2014
drop year
save plasma_count_tract2014, replace
*  Plasma count as of 2020
use plasma_count_tract, clear
keep if year==2020
rename ct_plasma ct_plasma2020
drop year
save plasma_count_tract2020, replace
*  Plasma count as of 2017 (for safegraph regressions must lag)
use plasma_count_tract, clear
keep if year==2017
rename ct_plasma ct_plasma2017
drop year
save plasma_count_tract2017, replace



* ACS, Census tract (base dataset, 2000-2021) -----------------------------------------------
use "$census/acs_tract_controls.dta", clear 
* convert tracts from numerber to string with 11 digits and leading zeros
format %11.0g tract
gen str11 tract2 = string(tract,"%011.0f")
drop tract
rename tract2 tract
tab year // ACS spans 2000-2020, but 2020 is duplicated in 2021 because 2021 ACS was not available at the time of analysis, so used 2020 in place (as stated in caption of Figure B2)

* merge in nets data by year and just for 2014
merge m:1 tract year using "$nets/nets_count_tract"
* keep if _merge!=2 // not dropping the tract/years that are in nets only 
drop _merge 
* merge in NETS 2014 snapshot
merge m:1 tract  using "$nets/nets_count_tract2014"
drop _merge 
* merge in safegraph data
merge m:1 tract  using "$nets/safegraph_count_tract"
drop _merge 
* merge in plasma 
merge m:1 tract year using "$nets/plasma_count_tract"
drop _merge 
* merge in plasma 2014 snapshot
merge m:1 tract  using "$nets/plasma_count_tract2014"
drop _merge 
* merge in plasma 2017 snapshot
merge m:1 tract  using "$nets/plasma_count_tract2017"
drop _merge 
* merge in plasma 2020 snapshot
merge m:1 tract  using "$nets/plasma_count_tract2020"
drop _merge 


* Data cleaning of merged data --------------------------------------------------------------------
* replace missing with zeros for count payday and pawn (since not existing is a zero count) and 
foreach b of varlist ct_pawn-ct_allest {
    replace `b' = 0 if missing(`b') & year<2015  // NETS data ends in 2014
}  
foreach b of varlist ct_pawn2014-border_tract {
    replace `b' = 0 if missing(`b')    
}  

* CREATE LEADS AND LAGS OF OPENINGS AND CLOSINGS
* calculate a dummy for at least 1 opening in that tract that year (must do lags here so that first-year missings aren't replaced by zeros above)
sort tract year
* payday
by tract: gen lag_ct_payday = ct_payday[_n-1]
g payday_open = (ct_payday>lag_ct_payday)
replace payday_open=. if missing(ct_payday) | missing(lag_ct_payday)
g payday_close = (ct_payday<lag_ct_payday)
replace payday_close=. if missing(ct_payday) | missing(lag_ct_payday)
* pawn
by tract: gen lag_ct_pawn = ct_pawn[_n-1]
g pawn_open = (ct_pawn>lag_ct_pawn)
replace pawn_open=. if missing(ct_pawn) | missing(lag_ct_pawn)
g pawn_close = (ct_pawn<lag_ct_pawn)
replace pawn_close=. if missing(ct_pawn) | missing(lag_ct_pawn)
* plasma
by tract: gen lag_ct_plasma = ct_plasma[_n-1]
g plasma_open = (ct_plasma>lag_ct_plasma)
replace plasma_open=. if missing(ct_plasma) | missing(lag_ct_plasma)
g plasma_close = (ct_plasma<lag_ct_plasma)
replace plasma_close=. if missing(ct_plasma) | missing(lag_ct_plasma)
* banks credit unions     
by tract: gen lag_ct_bankscu = ct_bankscu[_n-1]
* bars
by tract: gen lag_ct_bars = ct_bars[_n-1]
* resto
by tract: gen lag_ct_resto = ct_resto[_n-1]
* grocery
by tract: gen lag_ct_groc = ct_groc[_n-1]
* gas
by tract: gen lag_ct_gas = ct_gas[_n-1]
* all est
by tract: gen lag_ct_allest = ct_allest[_n-1]
* sum payday pawn
g lag_ct_paydaypawn = lag_ct_pawn + lag_ct_payday


* lag of plasma opening 
by tract: gen lag_plasma_open = plasma_open[_n-1]
* leading indicators of closing
* pawn
by tract: gen lead1_pawn_close = pawn_close[_n+1]
by tract: gen lead2_pawn_close = pawn_close[_n+2]
by tract: gen lead3_pawn_close = pawn_close[_n+3]
egen ct_y2_pawn_close = rowtotal(pawn_close lead1_pawn_close  )
egen ct_y3_pawn_close = rowtotal(pawn_close lead1_pawn_close lead2_pawn_close )
egen ct_y4_pawn_close = rowtotal(pawn_close lead1_pawn_close lead2_pawn_close lead3_pawn_close )
g y2_pawn_close_x100 = (ct_y2_pawn_close>0)*100
g y3_pawn_close_x100 = (ct_y3_pawn_close>0)*100
g y4_pawn_close_x100 = (ct_y4_pawn_close>0)*100
* payday
by tract: gen lead1_payday_close = payday_close[_n+1]
by tract: gen lead2_payday_close = payday_close[_n+2]
by tract: gen lead3_payday_close = payday_close[_n+3]
egen ct_y2_payday_close = rowtotal(payday_close lead1_payday_close  )
egen ct_y3_payday_close = rowtotal(payday_close lead1_payday_close lead2_payday_close )
egen ct_y4_payday_close = rowtotal(payday_close lead1_payday_close lead2_payday_close lead3_payday_close )
g y2_payday_close_x100 = (ct_y2_payday_close>0)*100
g y3_payday_close_x100 = (ct_y3_payday_close>0)*100
g y4_payday_close_x100 = (ct_y4_payday_close>0)*100
* leading indicators of opening
* pawn
by tract: gen lead1_pawn_open = pawn_open[_n+1]
by tract: gen lead2_pawn_open = pawn_open[_n+2]
by tract: gen lead3_pawn_open = pawn_open[_n+3]
egen ct_y2_pawn_open = rowtotal(pawn_open lead1_pawn_open  )
egen ct_y3_pawn_open = rowtotal(pawn_open lead1_pawn_open lead2_pawn_open )
egen ct_y4_pawn_open = rowtotal(pawn_open lead1_pawn_open lead2_pawn_open lead3_pawn_open )
g y2_pawn_open_x100 = (ct_y2_pawn_open>0)*100
g y3_pawn_open_x100 = (ct_y3_pawn_open>0)*100
g y4_pawn_open_x100 = (ct_y4_pawn_open>0)*100
* payday
by tract: gen lead1_payday_open = payday_open[_n+1]
by tract: gen lead2_payday_open = payday_open[_n+2]
by tract: gen lead3_payday_open = payday_open[_n+3]
egen ct_y2_payday_open = rowtotal(payday_open lead1_payday_open  )
egen ct_y3_payday_open = rowtotal(payday_open lead1_payday_open lead2_payday_open )
egen ct_y4_payday_open = rowtotal(payday_open lead1_payday_open lead2_payday_open lead3_payday_open )
g y2_payday_open_x100 = (ct_y2_payday_open>0)*100
g y3_payday_open_x100 = (ct_y3_payday_open>0)*100
g y4_payday_open_x100 = (ct_y4_payday_open>0)*100

*number of pawn and payday that open in the year(s) after a plasma center opens
* pawn
by tract: gen lead1_ct_pawn = ct_pawn[_n+1]
by tract: gen lead2_ct_pawn = ct_pawn[_n+2]
by tract: gen lead3_ct_pawn = ct_pawn[_n+3]
g d_ct_pawn = ct_pawn - lag_ct_pawn
g d_ct_y2_pawn = lead1_ct_pawn - lag_ct_pawn
g d_ct_y3_pawn = lead2_ct_pawn - lag_ct_pawn
g d_ct_y4_pawn = lead3_ct_pawn - lag_ct_pawn
* payday
by tract: gen lead1_ct_payday = ct_payday[_n+1]
by tract: gen lead2_ct_payday = ct_payday[_n+2]
by tract: gen lead3_ct_payday = ct_payday[_n+3]
g d_ct_payday = ct_payday - lag_ct_payday
g d_ct_y2_payday = lead1_ct_payday - lag_ct_payday
g d_ct_y3_payday = lead2_ct_payday - lag_ct_payday
g d_ct_y4_payday = lead3_ct_payday - lag_ct_payday

* 4 groups of years (for Figure B2)
g year_group=""
replace year_group="2001_2005" if (year>2000 & year<=2005)
replace year_group="2006_2010" if (year>2005 & year<=2010)
replace year_group="2011_2015" if (year>2010 & year<=2015)
replace year_group="2016_2021" if (year>2015 & year<=2021)

* variable tranformations
g log_inc_hhmean = log(inc_hhmean)
g log_pop2sqkm = log(pop2sqkm)
g minority_p = 1-white_p

* multiplied by 100 to make the coefficient estimates larger
g gini_x100 = gini*100

g plasma_open_x100 = plasma_open*100
tab plasma_open_x100 
g plasma_close_x100 = plasma_close*100
tab plasma_close_x100 
g payday_open_x100 = payday_open*100
tab payday_open_x100 
g payday_close_x100 = payday_close*100
tab payday_close_x100 
g pawn_open_x100 = pawn_open*100
tab pawn_open_x100 
g pawn_close_x100 = pawn_close*100
tab pawn_close_x100 

* drop out-of-sample years 
drop if year<=2000 // drop the first year since we can't track changes in opening before the first year of data
drop if year>2021 // remove 2022 since incomplete at the time of analysis

* remove if missing key Census variables (i.e., not enough people live there)
drop if missing(index) 
drop if missing(inc_hhmean)

* Group tracts into quartiles along socio-economic variables within each year from 2001-2020
foreach z of varlist  inc_hhmean ownocc_p minority_p  educ_bach_p educ_sc_p gini pvf200_p   empt_nft_p pop2sqkm    {
cap drop quart_`z'
gen quart_`z'=.
forvalues i=2001/2021 {
    capture drop xq
    xtile xq=`z' if year==`i', nq(4)
    replace quart_`z'=xq if year==`i'
}
}
drop xq


* SAVE FINAL DATASET
save final_location_analysis_data, replace


/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
											Figure B2
Frequency tables used to make excel graphs documenting where plasma centers tend to open and
whether these tendencies have evolved over time.
 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
 
use final_location_analysis_data, clear
keep if plasma_open==1

* Panel A: descriptive statistics on where opening occur by decile, by year group
local z  pop2sqkm_wq10 
eststo clear 
tab plasma_open `z', row nofreq 
by year_group, sort:  tab plasma_open `z', row nofreq 

* Panels B-D: descriptive statistics on where opening occur by quartiles of variables, by year group
local z   inc_hhmean //  ownocc_p minority_p 
eststo clear 
tab plasma_open quart_`z', row nofreq 
by year_group, sort:  tab plasma_open quart_`z', row nofreq 



/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
											Table B1
Establishment type correlates of plasma center opening locations
Are plasma centers more likely to open in areas that have more exposure to 
alternative financial service provider, conditional on other local-area socio-economic factors, 
X, like population, poverty rate, median income, unemployment, etc.... ?
 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
 
* Panel A NETS 2001-2015
use final_location_analysis_data, clear

global X_at     pvf100_p log_inc_hhmean aid_p pubai_p ownocc_p white_p educ_bach_p empt_ft_p log_pop2sqkm 
global Absorb fips#year   // state#year
global COUNT1_at  lag_ct_paydaypawn   
global COUNT4_at  lag_ct_paydaypawn  lag_ct_allest 
global COUNT5_at  lag_ct_paydaypawn  lag_ct_bankscu  lag_ct_resto lag_ct_groc lag_ct_bars lag_ct_gas  


eststo clear 
local pref   plasma_open_x100  payday_open_x100 payday_close_x100  pawn_open_x100 pawn_close_x100		 /* 		plasma_close_x100  */
foreach m of local pref {

eststo: quietly reghdfe `m' $COUNT1_at $X_at lag_ct_plasma , absorb( $Absorb ) vce(robust) 
estadd local Samp  	 "2001-2014" 
estadd local FE  	  "FIPSxY"  
estadd ysumm
eststo: quietly reghdfe `m' $COUNT4_at $X_at lag_ct_plasma , absorb( $Absorb ) vce(robust) 
estadd local Samp  	 "2001-2014" 
estadd local FE  	  "FIPSxY"  
estadd ysumm
eststo: quietly reghdfe `m' $COUNT5_at $X_at lag_ct_plasma , absorb( $Absorb ) vce(robust) 
estadd local Samp  	 "2001-2014" 
estadd local FE  	  "FIPSxY"  
estadd ysumm
}
		esttab using "$nets/OpenCloseRegs.csv", replace cells(b(star fmt(3)) se(par fmt(3)))   ///
		stats(N r2 ymean Samp FE , label(N "R2" "Y-mean" "Sample" "F.E.") fmt(0 2 3 0 0))    ///
		drop( *cons* $X_at ) order(  $COUNT4_at  $COUNT5_at lag_ct_plasma )  ///
		star(* 0.10 ** 0.05 *** 0.01) compress 
		
 

*Table B1, Panel B: SafeGraph cross-sectional
use final_location_analysis_data, clear
keep if year>=2018
tab year
collapse (max) plasma_open,  by(tract)
tab plasma_open
rename plasma_open plasma_open_2018_2021 // records a 1 if either year had an opening
g plasma_open_2018_2021_x100 = plasma_open_2018_2021*100
merge 1:m tract using "$nets/final_location_analysis_data"
drop _merge 
keep if year==2018 // the year of the control condition, which is just the equal cross-section

* Regressions: within a county, controling for tract-level socio-demographic differences, which existing establishment types predict that a tract will get a plasma center opening
global Absorb fips   // state#year
global X_at   pvf100_p log_inc_hhmean aid_p pubai_p ownocc_p white_p educ_bach_p empt_ft_p log_pop2sqkm 
global COUNT1_a  ct_NonBankLender2018 lag_ct_plasma
global COUNT3_a  ct_NonBankLender2018 ct_AllEstab2018 lag_ct_plasma
global COUNT4_a  ct_NonBankLender2018 ct_Bank2018 ct_Restaurants2018 ct_GroceryStores2018 ct_BarsLiquor2018 ct_GasStations2018 lag_ct_plasma

local pref   plasma_open_2018_2021_x100
eststo clear 
foreach m of local pref {
eststo: quietly reghdfe `m' $COUNT1_a $X_at , absorb( $Absorb ) vce(robust) 
estadd local Samp  	 "2018-2020" 
estadd local FE  	  "FIPS"  
estadd ysumm
eststo: quietly reghdfe `m' $COUNT3_a $X_at , absorb( $Absorb ) vce(robust) 
estadd local Samp  	 "2018-2020" 
estadd local FE  	  "FIPS"  
estadd ysumm
eststo: quietly reghdfe `m' $COUNT4_a $X_at , absorb( $Absorb ) vce(robust) 
estadd local Samp  	 "2018-2020" 
estadd local FE  	  "FIPS"  
estadd ysumm
}
		esttab using "$nets/OpenCloseRegs_2019_2020.csv", replace cells(b(star fmt(3)) se(par fmt(3)))   ///
		stats(N r2 ymean Samp FE , label(N "R2" "Y-mean" "Sample" "F.E.") fmt(0 2 3 0 0))    ///
		drop( *cons* $X_at ) order(  $COUNT3_a $COUNT4_a  )  ///
		star(* 0.10 ** 0.05 *** 0.01) compress 
 
